# Fig. S17: Green Manufacturing Project Announcements Over Time
# Alex Gazmararian
# agazmararian@gmail.com
#
# Creates bar plot showing number of green manufacturing project announcements
# by quarter with IRA passage marked.
#
# Note: Raw BGM data is restricted. This script uses a cached summary for
# replication. If cache not available, processes from extdata and caches.

library(tidyverse)
library(here)
library(zoo)

message("=== GENERATING FIG. S17: GREEN MANUFACTURING ANNOUNCEMENTS ===")

# Define paths
cache_file <- here("data", "cache", "green_mfg_summary.rds")
output_path <- here("output", "pnas", "figures", "fig_S17_green_mfg_plot.pdf")

# Check for cached summary first (for replication)
if (file.exists(cache_file)) {
  message("[OK] Using cached green manufacturing summary from data/cache/")
  quarterly_summary <- readRDS(cache_file)
  
} else {
  # Need to process from restricted extdata
  message("Processing green manufacturing data from restricted extdata...")
  
  extdata_file <- here("extdata", "bgm_turner", "The-Big-Green-Machine Dataset.xlsx")
  if (!file.exists(extdata_file)) {
    stop("BGM data file not found: ", extdata_file, "\n",
         "This is restricted data. Please either:\n",
         "1. Obtain the data from https://sites.google.com/view/biggreenmanufacturing\n",
         "2. Use the cached summary in data/cache/green_mfg_summary.rds")
  }
  
  library(readxl)
  library(janitor)
  
  # Load and process data
  supdat <- read_xlsx(extdata_file, sheet = "Archived Dataset-4-19-25", progress = FALSE)
  supdat <- clean_names(supdat)
  
  supdat <- supdat %>%
    filter(country == "USA") %>%
    filter(!project_announcement_date %in% c("n.d.", "N.D."))
  
  # Process dates (Excel serial dates and literal years)
  excel_serial_dates <- supdat %>%
    filter(!is.na(project_announcement_date)) %>%
    filter(project_announcement_date != "") %>%
    mutate(
      numeric_date = suppressWarnings(as.numeric(project_announcement_date)),
      date = case_when(
        # Literal years (1900-2030)
        !is.na(numeric_date) & numeric_date >= 1900 & numeric_date <= 2030 & 
          numeric_date == round(numeric_date) ~ 
          as.Date(paste0(numeric_date, "-01-01")),
        # Excel serial dates
        !is.na(numeric_date) ~ 
          as.Date(numeric_date, origin = "1899-12-30"),
        TRUE ~ NA_Date_
      )
    ) %>%
    select(-numeric_date)
  
  # Create quarterly summary
  quarterly_summary <- excel_serial_dates %>%
    filter(date > as.Date("2000-01-01") & date < as.Date("2025-01-01")) %>%
    mutate(year_quarter = as.yearqtr(date)) %>%
    group_by(year_quarter) %>%
    summarize(
      n_projects = n(),
      .groups = "drop"
    )
  
  # Cache the summary for replication
  dir.create(dirname(cache_file), recursive = TRUE, showWarnings = FALSE)
  saveRDS(quarterly_summary, cache_file)
  message("[OK] Cached quarterly summary to: ", cache_file)
}

message("Creating plot with ", sum(quarterly_summary$n_projects), " total projects across ", 
        nrow(quarterly_summary), " quarters")

# Create figure ----
fig_s17 <- quarterly_summary %>%
  ggplot(aes(x = year_quarter, y = n_projects)) +
  geom_col(fill = "steelblue", alpha = 0.8) +
  geom_vline(
    xintercept = as.yearqtr("2022 Q3"), 
    lty = "dashed",
    color = "red",
    linewidth = 1
  ) +
  annotate(
    "text", 
    x = as.yearqtr("2022 Q3"), y = Inf,
    label = "IRA passed\nAug. 16, 2022", 
    hjust = 1.05, vjust = 1.1,
    size = 3
  ) +
  scale_x_yearqtr(
    breaks = seq(as.yearqtr("2000 Q1"), as.yearqtr("2024 Q4"), by = 1),
    labels = function(x) format(x, "%Y"),
    expand = c(0, 0)
  ) +
  scale_y_continuous(expand = c(0, 0)) +
  theme_classic(base_size = 14) +
  theme(
    panel.grid = element_blank(),
    legend.position = "inside",
    legend.position.inside = c(.1, .9),
    axis.ticks = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1)
  ) +
  labs(
    x = "Year-Quarter", 
    y = "Number of projects",
    fill = NULL,
    color = NULL,
    lty = NULL
  )

# Save figure ----
ggsave(
  output_path,
  plot = fig_s17,
  width = 6.5, height = 6.5 / 1.618,
  scale = 1.5
)

message("[OK] Fig. S17 saved to: ", output_path)
